The function call of FnFMining wraps up all the helper functions in this Notebook. It's using the IDs harvested in the last Notebook and saved in the NetWork-files to collect data about these IDs.

The function takes as input the twitterfiles of each category (NatBibTwitter.csv etc.), opens for each library in this file the corresponding NetWork-[datestamp]-file and returns

for each library
- a csv-file with the Friends-IDs and Data (if the library is following other accounts; there are some libraries which are actually not following)
- a csv-file with the Followers-IDs and Data
a list of the files for each library category, called NatBib_Files.txt etc.
also an error message for user IDs which could not be accessed. This is, most of the time, because the accounts were deleted. One could check these IDs via a service as e.g. http://tweeterid.com.

The Friends and Followers files contain List of Dictionaries (LoD) with the keys: friends_description, friends_user_id, friends_location, friends_screen_name, and followers_description, followers_user_id, followers_location, followers_screen_name respectively.



In [9]:

    
# Code from MTSW 2Ed.
# cf. https://github.com/ptwobrussell/Mining-the-Social-Web-2nd-Edition

import twitter

def oauth_login():
    # XXX: Go to http://twitter.com/apps/new to create an app and get values
    # for these credentials that you'll need to provide in place of these
    # empty string values that are defined as placeholders.
    # See https://dev.twitter.com/docs/auth/oauth for more information 
    # on Twitter's OAuth implementation.
    
    CONSUMER_KEY = 
    CONSUMER_SECRET =
    OAUTH_TOKEN = 
    OAUTH_TOKEN_SECRET =    
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)
    
    twitter_api = twitter.Twitter(auth=auth)
    return twitter_api

# Sample usage
twitter_api = oauth_login()



In [10]:

    
#importing libraries
import json   #for pretty printing
import time   #for calculating Tweets per day
import operator #for sorting dictionaries
from collections import Counter #for turning lists to dictionaries etc.
from prettytable import PrettyTable   #for pretty printing in a table


# helper function Prettyprint taken from MTSW 2Ed.

def prettyPrint(Sp_1, Sp_2, counted_list_of_tuples):
    ptLang = PrettyTable(field_names=[Sp_1, Sp_2])
    [ptLang.add_row(kv) for kv in counted_list_of_tuples]
    ptLang.align[Sp_1], ptLang.align[Sp_2] = 'l', 'r'
    print ptLang
    
# helper function: safe the results as a csv-file

#import & export CSV
import csv

def impCSV(input_file):
    '''
    input_file = csv with keys: "URL", "Twitter"
    output = list of dictionaries
    '''
    f = open(input_file, 'r')
    d = csv.DictReader(f)
    LoD = []   # list of dictionaries
    for row in d:
        LoD.append(row)
    f.close()
    return LoD

def exp2CSV(listOfDict, filename):
    '''
    arguments = list of dictionaries, filename
    output = saves file to cwd (current working directory)
    '''
    outputfile = filename
    keyz = listOfDict[0].keys()
    f = open(outputfile,'w')
    dict_writer = csv.DictWriter(f,keyz)
    dict_writer.writer.writerow(keyz)
    dict_writer.writerows(listOfDict)
    f.close()



In [11]:

    
# Both functions from MTSW 2 Ed.

import sys
from urllib2 import URLError
from httplib import BadStatusLine

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw): 
    
    # A nested helper function that handles common HTTPErrors. Return an updated
    # value for wait_period if the problem is a 500 level error. Block until the
    # rate limit is reset if it's a rate limiting issue (429 error). Returns None
    # for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
    
        if wait_period > 3600: # Seconds
            print >> sys.stderr, 'Too many retries. Quitting.'
            raise e
    
        # See https://dev.twitter.com/docs/error-codes-responses for common codes
    
        if e.e.code == 401:
            print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
            return None
        elif e.e.code == 404:
            print >> sys.stderr, 'Encountered 404 Error (Not Found)'
            return None
        elif e.e.code == 429: 
            print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
            if sleep_when_rate_limited:
                print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print >> sys.stderr, '...ZzZ...Awake now and trying again.'
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print >> sys.stderr, 'Encountered %i Error. Retrying in %i seconds' % \
                (e.e.code, wait_period)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e

    # End of nested helper function
    
    wait_period = 2 
    error_count = 0 

    while True:
        try:
            return twitter_api_func(*args, **kw)
        except twitter.api.TwitterHTTPError, e:
            error_count = 0 
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "URLError encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise
        except BadStatusLine, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "BadStatusLine encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise

# See https://dev.twitter.com/docs/api/1.1/get/users/lookup for 
# twitter_api.users.lookup


def get_user_profile(twitter_api, screen_names=None, user_ids=None):
   
    # Must have either screen_name or user_id (logical xor)
    assert (screen_names != None) != (user_ids != None), \
    "Must have screen_names or user_ids, but not both"
    
    items_to_info = {}

    items = screen_names or user_ids
    
    while len(items) > 0:

        # Process 100 items at a time per the API specifications for /users/lookup.
        # See https://dev.twitter.com/docs/api/1.1/get/users/lookup for details.
        
        items_str = ','.join([str(item) for item in items[:100]])
        items = items[100:]

        if screen_names:
            response = make_twitter_request(twitter_api.users.lookup, 
                                            screen_name=items_str)
        else: # user_ids
            response = make_twitter_request(twitter_api.users.lookup, 
                                            user_id=items_str)
    
        for user_info in response:
            if screen_names:
                items_to_info[user_info['screen_name']] = user_info
            else: # user_ids
                items_to_info[user_info['id']] = user_info

    return items_to_info



In [18]:

    
def lookUpProfilesFriends(listOfIDs):
    ''' 
    input: list of IDs of Friends or Followers
    output: list dictionaries with keys 'user_id', 'screen_name', 'location', 'description'
    '''
    LoD = []
    errorIDs = []
    
    profiles = get_user_profile(twitter_api, user_ids=listOfIDs)
    try:
        for e in listOfIDs:
            infoDic = {}
            infoDic['friends_user_id'] = e
            infoDic['friends_screen_name'] = profiles[e]['screen_name']
            infoDic['friends_location'] = (profiles[e]['location']).encode('utf-8')
            infoDic['friends_description'] = (profiles[e]['description']).encode('utf-8')
            LoD.append(infoDic)
    except:
        errorIDs.append(e)
    if len(errorIDs) > 0:
        print
        print 'Error for these IDs:', errorIDs
        print
    return LoD


def lookUpProfilesFollowers(listOfIDs):
    ''' 
    input: list of IDs of Friends or Followers
    output: list dictionaries with keys 'user_id', 'screen_name', 'location', 'description'
    '''
    LoD = []
    errorIDs = []
    profiles = get_user_profile(twitter_api, user_ids=listOfIDs)
    
    try:
        for e in listOfIDs:
            infoDic = {}
            infoDic['followers_user_id'] = e
            infoDic['followers_screen_name'] = profiles[e]['screen_name']
            infoDic['followers_location'] = (profiles[e]['location']).encode('utf-8')
            infoDic['followers_description'] = (profiles[e]['description']).encode('utf-8')
            LoD.append(infoDic)
    except:
        errorIDs.append(e)
    if len(errorIDs) > 0:
        print 'Error for these IDs:', errorIDs
    
    return LoD


def wrapLookUp(dictOfFnFs):
    '''
    input: dict of FnFs of a lib (with keys 'followers_ids', 'friends_ids', 'screen_name' (of the lib)
    output: a list of filenames
    saves two files: <twitterhandel>_Friends_<datestamp>.csv and <twitterhandel>_Followers_<datestamp>.csv
    '''
    f1 = dictOfFnFs['friends_ids']
    f2 = dictOfFnFs['followers_ids']
    
    #in case the list is converted to a str
    if type(f1) == str and f1 != '[]':
        f11 = f1.strip('[]')
        f1 = [int(s) for s in f11.split(',')]
    else:
        pass
    if type(f2) == str and f2 != '[]':
        f21 = f2.strip('[]')
        f2 = [int(s) for s in f21.split(',')]
    else:
        pass
    
    if len(f1) > 0 and type(f1) == list:
        friends = lookUpProfilesFriends(f1)
    else:
        friends = []
    if len(f2) > 0 and type(f2) == list:
        followers = lookUpProfilesFollowers(f2)
    else:
        followers = []
            
    #creating the filename of the csv with current datestamp 
    import datetime
    datestamp = datetime.datetime.now().strftime('%Y-%m-%d')
    filename_friends = dictOfFnFs['screen_name'] + '_Friends_' + datestamp + '.csv'
    filename_followers = dictOfFnFs['screen_name'] + '_Followers_' + datestamp + '.csv'
    LoFilenames = [] # [filename_friends, filename_followers]
    
    #export as CSV to CWD
    if len(friends) > 0:
        exp2CSV(friends, filename_friends)
        LoFilenames.append(filename_friends)
    if len(followers) > 0:
        exp2CSV(followers, filename_followers)
        LoFilenames.append(filename_followers)
  
    return LoFilenames



In [19]:

    
def FnFMining(Twitterfile, datestamp):
    '''
    input: the NatBibTwitter.csv etc. filenames and the datestamp of the'_NetWork_2014-03-11.csv' file.
    (the library Twitter name will be added).
    '''
    import pickle                      # for saving the list to a file
    
    f = impCSV(Twitterfile)
    listOfFilenames = []
    for e in f:
        n = e['Twitter']                # get Twitter handel of the library
        filename = n + '_NetWork_' + datestamp + '.csv' # create the filename for the library
        print filename
        b = impCSV(filename)            # import this file
        p = wrapLookUp(b[0])            # get description etc. for the FnFs of the library
        
        print p                        # print the filenames for each library
        listOfFilenames += p

    # for saving the list to a file    
    filename2 = Twitterfile[:-11] + '_Files.txt'   # creating a filename like UniBibFiles.txt
    print filename2
    with open(filename2, 'wb') as f:
        pickle.dump(listOfFilenames, f)

Function Calls

National Libraries



In [20]:

    
FnFMining('NatBibTwitter2.csv', '2014-04-06')









    



bsb_muenchen_NetWork_2014-04-06.csv
['bsb_muenchen_Friends_2014-04-07.csv', 'bsb_muenchen_Followers_2014-04-07.csv']
dnb_aktuelles_NetWork_2014-04-06.csv
['dnb_aktuelles_Friends_2014-04-07.csv', 'dnb_aktuelles_Followers_2014-04-07.csv']
sbb_news_NetWork_2014-04-06.csv
Error for these IDs: [12456992]
['sbb_news_Followers_2014-04-07.csv']
NatBibT_Files.txt

University Libraries



In [7]:

    
FnFMining('UniBibTwitter2.csv', '2014-04-06')









    



ub_oldenburg_NetWork_2014-04-06.csv
['ub_oldenburg_Friends_2014-04-06.csv', 'ub_oldenburg_Followers_2014-04-06.csv']
hsubib_NetWork_2014-04-06.csv
['hsubib_Friends_2014-04-06.csv', 'hsubib_Followers_2014-04-06.csv']
ubhumboldtuni_NetWork_2014-04-06.csv
['ubhumboldtuni_Friends_2014-04-06.csv', 'ubhumboldtuni_Followers_2014-04-06.csv']
kitbibliothek_NetWork_2014-04-06.csv
['kitbibliothek_Friends_2014-04-06.csv', 'kitbibliothek_Followers_2014-04-06.csv']
kizuulm_NetWork_2014-04-06.csv
['kizuulm_Friends_2014-04-06.csv', 'kizuulm_Followers_2014-04-06.csv']
subugoe_NetWork_2014-04-06.csv
Error for these IDs: [50072429]
['subugoe_Friends_2014-04-06.csv', 'subugoe_Followers_2014-04-06.csv']
ubbochum_NetWork_2014-04-06.csv
['ubbochum_Friends_2014-04-06.csv', 'ubbochum_Followers_2014-04-06.csv']
slubdresden_NetWork_2014-04-06.csv
Error for these IDs: [74824233]
['slubdresden_Friends_2014-04-06.csv', 'slubdresden_Followers_2014-04-06.csv']
elibbremen_NetWork_2014-04-06.csv
['elibbremen_Friends_2014-04-06.csv', 'elibbremen_Followers_2014-04-06.csv']
stabihh_NetWork_2014-04-06.csv
['stabihh_Friends_2014-04-06.csv', 'stabihh_Followers_2014-04-06.csv']





    



Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...






    



ub_tu_berlin_NetWork_2014-04-06.csv
['ub_tu_berlin_Friends_2014-04-06.csv', 'ub_tu_berlin_Followers_2014-04-06.csv']
tubhh_NetWork_2014-04-06.csv
['tubhh_Friends_2014-04-06.csv', 'tubhh_Followers_2014-04-06.csv']
ulbbonn_NetWork_2014-04-06.csv
['ulbbonn_Friends_2014-04-06.csv', 'ulbbonn_Followers_2014-04-06.csv']
ubbayreuth_info_NetWork_2014-04-06.csv
['ubbayreuth_info_Friends_2014-04-06.csv', 'ubbayreuth_info_Followers_2014-04-06.csv']
ub_bi_NetWork_2014-04-06.csv
['ub_bi_Friends_2014-04-06.csv', 'ub_bi_Followers_2014-04-06.csv']
unibib_bs_NetWork_2014-04-06.csv
['unibib_bs_Friends_2014-04-06.csv', 'unibib_bs_Followers_2014-04-06.csv']
ub_wue_NetWork_2014-04-06.csv
['ub_wue_Friends_2014-04-06.csv', 'ub_wue_Followers_2014-04-06.csv']
unibib_NetWork_2014-04-06.csv
['unibib_Friends_2014-04-06.csv', 'unibib_Followers_2014-04-06.csv']





    



...ZzZ...Awake now and trying again.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.





    



ubdue_NetWork_2014-04-06.csv
['ubdue_Friends_2014-04-06.csv', 'ubdue_Followers_2014-04-06.csv']
ub_fau_NetWork_2014-04-06.csv
['ub_fau_Friends_2014-04-06.csv', 'ub_fau_Followers_2014-04-06.csv']
tibub_NetWork_2014-04-06.csv
['tibub_Friends_2014-04-06.csv', 'tibub_Followers_2014-04-06.csv']
ubkassel_NetWork_2014-04-06.csv
['ubkassel_Friends_2014-04-06.csv', 'ubkassel_Followers_2014-04-06.csv']
ubleipzig_NetWork_2014-04-06.csv
Error for these IDs: [166163038]
['ubleipzig_Friends_2014-04-06.csv', 'ubleipzig_Followers_2014-04-06.csv']
ubmainz_NetWork_2014-04-06.csv
['ubmainz_Friends_2014-04-06.csv', 'ubmainz_Followers_2014-04-06.csv']
unibib_mr_NetWork_2014-04-06.csv
['unibib_mr_Friends_2014-04-06.csv', 'unibib_mr_Followers_2014-04-06.csv']
ubreg_NetWork_2014-04-06.csv
Error for these IDs: [334516554]
['ubreg_Friends_2014-04-06.csv', 'ubreg_Followers_2014-04-06.csv']
zbsport_NetWork_2014-04-06.csv
['zbsport_Friends_2014-04-06.csv', 'zbsport_Followers_2014-04-06.csv']
UniBibT_Files.txt

Public Libraries



In [8]:

    
FnFMining('OeBibTwitter2.csv', '2014-04-06')









    



stb_bielefeld_NetWork_2014-04-06.csv
Error for these IDs: [23788414]
['stb_bielefeld_Friends_2014-04-06.csv', 'stb_bielefeld_Followers_2014-04-06.csv']
stabi_bremen_NetWork_2014-04-06.csv
['stabi_bremen_Friends_2014-04-06.csv', 'stabi_bremen_Followers_2014-04-06.csv']
stbessen_NetWork_2014-04-06.csv
['stbessen_Friends_2014-04-06.csv', 'stbessen_Followers_2014-04-06.csv']
stbibkoeln_NetWork_2014-04-06.csv

Error for these IDs: [171132336]

['stbibkoeln_Friends_2014-04-06.csv', 'stbibkoeln_Followers_2014-04-06.csv']
stadtbueduedorf_NetWork_2014-04-06.csv
['stadtbueduedorf_Friends_2014-04-06.csv', 'stadtbueduedorf_Followers_2014-04-06.csv']
hoeb4u_NetWork_2014-04-06.csv
['hoeb4u_Friends_2014-04-06.csv', 'hoeb4u_Followers_2014-04-06.csv']
bibliothek_wit_NetWork_2014-04-06.csv
['bibliothek_wit_Friends_2014-04-06.csv', 'bibliothek_wit_Followers_2014-04-06.csv']
mediothek_NetWork_2014-04-06.csv
['mediothek_Friends_2014-04-06.csv', 'mediothek_Followers_2014-04-06.csv']
stabi_erlangen_NetWork_2014-04-06.csv
['stabi_erlangen_Friends_2014-04-06.csv', 'stabi_erlangen_Followers_2014-04-06.csv']
stabifr_NetWork_2014-04-06.csv
['stabifr_Friends_2014-04-06.csv', 'stabifr_Followers_2014-04-06.csv']
stabigoe_NetWork_2014-04-06.csv
['stabigoe_Friends_2014-04-06.csv', 'stabigoe_Followers_2014-04-06.csv']
stbneuss_NetWork_2014-04-06.csv
['stbneuss_Friends_2014-04-06.csv', 'stbneuss_Followers_2014-04-06.csv']
stbsalzgitter_NetWork_2014-04-06.csv
['stbsalzgitter_Friends_2014-04-06.csv', 'stbsalzgitter_Followers_2014-04-06.csv']
stabiso_NetWork_2014-04-06.csv
['stabiso_Friends_2014-04-06.csv', 'stabiso_Followers_2014-04-06.csv']
sbchemnitz_NetWork_2014-04-06.csv
['sbchemnitz_Friends_2014-04-06.csv', 'sbchemnitz_Followers_2014-04-06.csv']
stabiguetersloh_NetWork_2014-04-06.csv
['stabiguetersloh_Friends_2014-04-06.csv', 'stabiguetersloh_Followers_2014-04-06.csv']
stabi_mannheim_NetWork_2014-04-06.csv
['stabi_mannheim_Friends_2014-04-06.csv', 'stabi_mannheim_Followers_2014-04-06.csv']
stadtbibliothek_NetWork_2014-04-06.csv
['stadtbibliothek_Friends_2014-04-06.csv', 'stadtbibliothek_Followers_2014-04-06.csv']
stadtbibmg_NetWork_2014-04-06.csv
['stadtbibmg_Friends_2014-04-06.csv', 'stadtbibmg_Followers_2014-04-06.csv']





    



URLError encountered. Continuing.
URLError encountered. Continuing.





    



buecherei_ms_NetWork_2014-04-06.csv
['buecherei_ms_Friends_2014-04-06.csv', 'buecherei_ms_Followers_2014-04-06.csv']
stabuewuerzburg_NetWork_2014-04-06.csv
['stabuewuerzburg_Friends_2014-04-06.csv', 'stabuewuerzburg_Followers_2014-04-06.csv']
OeBibT_Files.txt



In [ ]: